# save_as_swebench_verified_test.py
from datasets import load_dataset
import json, os
from tqdm import tqdm

# ds = load_dataset("princeton-nlp/SWE-bench_Verified", split="test")

# 替换为你的本地文件路径
ds = load_dataset(
    "parquet",
    data_files={  # 由于无法访问到Hugface，这里加载下载好的文件
        "test": r"D:\JunTuan\zxli26\0303_swebench\swe-bench-verified\data\test-00000-of-00001.parquet"
    }, split="test"
)

os.makedirs("swebench", exist_ok=True)

# 把题目写成一行一个 jsonl，方便后面逐条读
with open("swebench/公开测评集-swe-bench-verified.jsonl", "w", encoding="utf-8") as f:
    for ins in tqdm(ds, desc="swe-bench-verified 内容提取中..."):
        f.write(json.dumps(ins, ensure_ascii=False) + "\n")

print("共 {} 条任务，已保存到 swebench/公开测评集-swe-bench-verified.jsonl".format(len(ds)))
